In [2]:
import string
import copy
import scipy
import Tkinter, tkFileDialog
import numpy as np
import pandas as pd
from scipy.spatial.distance import cdist
import matplotlib.pyplot as plt
import os
import re
import sys
import cPickle
import glob
sys.path.append(os.path.abspath("C:\Users\Scherer Lab E\Documents\GitHub\Python_Data_Analysis"))
import common_functions
%matplotlib inline
C:\Anaconda\lib\site-packages\skimage\viewer\utils\core.py:10: UserWarning: Recommended matplotlib backend is `Agg` for full skimage.viewer functionality.
  warn("Recommended matplotlib backend is `Agg` for full "
In [3]:
cd "K:\Pat's_Projects\ParticleTrajectoryData"
K:\Pat's_Projects\ParticleTrajectoryData
In [4]:
store = pd.HDFStore('half_nanoplate_dynamics_processed.h5', mode='r')
In [5]:
df_dict = dict([(key,store.get(key)) for key in store.index['key']])
panel = pd.Panel(df_dict)

Rate of Particles Entering/Leaving Plate

Rate Entering Plate

To determine when a particle enters the plate a small section of the positions are chosen based on theta around the barrier for the plate. That area is monitored and when a particle appears over the plate then it counts as an event.

In [6]:
frame_rate = 90.0
keys = store.index['key']
results = pd.DataFrame()
results['L'] = store.index['L']
for i,key in enumerate(keys):
    df = store.get(key).copy()
    df = df.query('75 < theta < 120')
    df = df[df['over_plate'] == True]
    df = df.drop_duplicates(subset=['frame', 'track id'], keep='first')
    entering_plate = df.groupby('track id', group_keys=False).apply(lambda x: x[(x.frame - x.frame.shift(1) > 3) | pd.isnull(x.frame - x.frame.shift(1))])
    results.loc[i,'Experiment'] = key
    results.loc[i, '# Events'] = len(entering_plate)
    results.loc[i, 'Number of Frames'] = 2000
    results.loc[i, 'Frame Rate (fps)'] = 90
    results.loc[i, 'Events/Sec'] = len(entering_plate)*frame_rate/2000.0
results
Out[6]:
L Experiment # Events Number of Frames Frame Rate (fps) Events/Sec
0 1 Exp01201401/Mov_01201403 39.0 2000.0 90.0 1.755
1 2 Exp01201401/Mov_01201404 136.0 2000.0 90.0 6.120
2 3 Exp01201401/Mov_01201405 176.0 2000.0 90.0 7.920
3 4 Exp01201401/Mov_01201406 466.0 2000.0 90.0 20.970
4 5 Exp01201401/Mov_01201407 424.0 2000.0 90.0 19.080
5 10 Exp01201401/Mov_01201408 2194.0 2000.0 90.0 98.730

Rate Leaving Plate

To determine when a particle leaves a plate a small section of the particle positions are chosen in theta. Then each track is checked to see when a particle goes from being on the plate to being off the plate.

In [7]:
frame_rate = 90.0
keys = store.index['key']
results = pd.DataFrame()
results['L'] = store.index['L']
def check_over_plate_in_past(group):
    '''Returns the entries where particles that are over glass were just over 
    the plate last time they could be identified. This should be used with a 
    groupby on a data frame grouped by 'track id'. 
    
    :pram group: The group from the groupby operation, should be no duplicate
    entries w.r.t. frame and track id.
    '''
    new_group = group.iloc[1:]
    cur_bool = group.over_plate.iloc[1:]
    fut_bool = group.over_plate.shift(1).iloc[1:].astype(np.bool)
    #print len(cur_bool), len(fut_bool)
    #print ~cur_bool & fut_bool
    return new_group[(~cur_bool & fut_bool)]

for i,key in enumerate(keys):
    df = store.get(key).copy()
    df = df.query('230 < theta < 300')
    #df = df[df['over_plate'] == False]
    df = df.drop_duplicates(subset=['frame', 'track id'], keep='first')
    #leaving_plate = df.groupby('track id', group_keys=False).apply(lambda x: x[(x.frame - x.frame.shift(1) > 3) | pd.isnull(x.frame - x.frame.shift(1))])
    #leaving_plate = df.groupby('track id', group_keys=False).apply(lambda x: x[x.over_plate.reset_index() & ~pd.notnull(x.over_plate.shift(1).reset_index())])
    leaving_plate = df.groupby('track id', group_keys=False).apply(check_over_plate_in_past)
    #print leaving_plate
    
    results.loc[i,'Experiment'] = key
    results.loc[i, '# Events'] = len(leaving_plate)
    results.loc[i, 'Number of Frames'] = 2000
    results.loc[i, 'Frame Rate (fps)'] = 90
    results.loc[i, 'Events/Sec'] = len(leaving_plate)*frame_rate/2000.0
results
Out[7]:
L Experiment # Events Number of Frames Frame Rate (fps) Events/Sec
0 1 Exp01201401/Mov_01201403 29.0 2000.0 90.0 1.305
1 2 Exp01201401/Mov_01201404 89.0 2000.0 90.0 4.005
2 3 Exp01201401/Mov_01201405 131.0 2000.0 90.0 5.895
3 4 Exp01201401/Mov_01201406 297.0 2000.0 90.0 13.365
4 5 Exp01201401/Mov_01201407 304.0 2000.0 90.0 13.680
5 10 Exp01201401/Mov_01201408 1145.0 2000.0 90.0 51.525

Distribution around Edge (Over Plate and Over Glass)

Here the distribution of particle positions for each experiment are shown as the particles' position in theta. The plate is at theta ~275 degrees. The bottom axis shows the position in theta while the top axis shows the position from 275 in microns (calculated from arc length). The average R is used for calculating the arc length which led to some minor changes in the um conversion for the different experiments.

In [9]:
from scipy.stats import norm

lower_theta_cutoff = [250, 260, 262, 264, 263, 265]
um_conv=6.5/60/1.6/2
for idx, key in enumerate(store.index['key']):
    df = store.get(key)
    fig = plt.figure(figsize=[8,3.5])
    
    ax1 = fig.add_subplot(1, 2, 1)
    
    # The entire DataFrame is used to find the r_avg
    r_avg = df.r.mean()
    
    # For the over plate plot
    theta_subset = df.query('240 < theta < 275')
    theta_subset = theta_subset.drop_duplicates(subset=['frame', 'track id'], keep='first')
    hist, bin_edges, patches = ax1.hist(theta_subset['theta'].values, bins=30, range=[240,275])
    ax1.set_xlabel('Theta (degrees)')
    ax1.set_ylabel('Counts')
    ax2 = ax1.twiny()
    ax2.set_xlim(ax1.get_xlim())
    ax2.set_xticks(np.arange(240, 282, 7))
    ax2.set_xticklabels(["%.1f" % v for v in np.arange(35, -7, -7)*(np.pi/180.0)*r_avg*um_conv])
    ax2.set_xlabel('$\mu$m from edge')
    
    t1 = ax1.set_title(key+'\nArea Before Edge Over Plate')
    
    # For the over glass plot
    ax3 = fig.add_subplot(1, 2, 2)
    theta_subset = df.query('275 < theta < 310')
    theta_subset = theta_subset.drop_duplicates(subset=['frame', 'track id'], keep='first')
    hist, bin_edges, patches = plt.hist(theta_subset['theta'].values, bins=30, range=[275,310])
    t2 = plt.title(key+'\nArea After Edge Over Glass')
    plt.ylabel('Counts')
    plt.xlabel('Theta (degrees)')
    ax4 = ax3.twiny()
    ax4.set_xlim(ax3.get_xlim())
    ax4.set_xticks(np.arange(275, 317, 7))
    ax4.set_xticklabels(["%.1f" % v for v in np.arange(0, 42, 7)*(np.pi/180.0)*r_avg*um_conv])
    ax4.set_xlabel('$\mu$m from edge')
    
    
    plt.tight_layout()
    t1.set_y(1.17)
    t2.set_y(1.17)
    plt.show()

Find which particles just left plate and just entered glass

In order to make the data easier to analyze the frames when particles leave the plate and when they first enter the glass need to be identified.

In [10]:
frame_rate = 90.0
keys = store.index['key']
results = pd.DataFrame()
results['L'] = store.index['L']
def check_over_glass_future(group):
    '''Returns the entries where particles that are over the plate will be found
    over the glass the next time they could be identified. This should be used 
    with a groupby on a data frame grouped by 'track id'. 
    
    :pram group: The group from the groupby operation, should be no duplicate
    entries w.r.t. frame and track id.
    '''
    new_group = group.iloc[:-1]
    cur_bool = group.over_plate.iloc[:-1]
    fut_bool = group.over_plate.shift(-1).iloc[:-1].astype(np.bool)
    #print len(cur_bool), len(fut_bool)
    #print ~cur_bool & fut_bool
    #print new_group[(cur_bool & ~fut_bool)]
    return new_group[(cur_bool & ~fut_bool)]

kinetic_data = []
for i,key in enumerate(keys):
    df = store.get(key).copy()
    df_subset = df.query('220 < theta < 300')
    df_subset = df_subset.drop_duplicates(subset=['frame', 'track id'], keep='first')
    
    # Find the a transitioning particles' last frame on plate
    leaving_plate = df_subset.groupby('track id', group_keys=False).apply(check_over_glass_future)
    #df['last_frame_plate'] = df.frame.isin(leaving_plate.frame) & df['track id'].isin(leaving_plate['track id'])
    index_leaving_plate = pd.merge(df, leaving_plate, on=['frame', 'track id'], right_index=True).index
    df['last_frame_plate'] = False
    df.loc[index_leaving_plate, 'last_frame_plate'] = True
    
   
    # Find the a transitioning particles' first frame on glass
    entering_glass = df_subset.groupby('track id', group_keys=False).apply(check_over_plate_in_past)
    #df['first_frame_glass'] = df.frame.isin(entering_glass.frame) & df['track id'].isin(entering_glass['track id'])
    index_entering_glass = pd.merge(df, entering_glass, on=['frame', 'track id'], right_index=True).index
    df['first_frame_glass'] = False
    df.loc[index_entering_glass, 'first_frame_glass'] = True
    
    kinetic_data.append(df)
In [11]:
Ls = [1,2,3,4,5,10]
for num,v in enumerate(kinetic_data):
    test = v.query('220 < theta < 275')
    test = test.drop_duplicates(subset=['frame', 'track id'], keep='first')
    plt.hist(test['theta'].values, bins=30, range=[220,275], normed=True, alpha=0.5)
    plt.hist(test[test['last_frame_plate']==True]['theta'].values, bins=30, range=[220,275], normed=True, alpha=0.5)
    plt.legend(['Positions', 'Positions Successes'], loc='upper left')
    plt.ylabel('Normed Probability Density')
    plt.xlabel('Position Theta (degrees)')
    plt.title('Positions and Positions of Success L='+str(Ls[num]))
    plt.show()

Compare Attempts to Successes of Transition

Using the regions shown below as regions where attempts at a transition are being made we can compare the number of times a transition is successful over the number of times it was attempted (i.e. the number of frames the particle is in the defined region). This will give us a probability of the particle making the transition.

In [12]:
'''As the drive force increases there is a possibility that several particles 
make the transition at the same time. This is the mean of the number of particles
that will make the transition on any given frame that has at least one transition
(i.e. if only one particle goes at a time this will be its lowest value of 1.0).'''

for v in kinetic_data:
    test = v[v.last_frame_plate == True]
    test = test.drop_duplicates(subset=['frame', 'track id'], keep='first')
    num_succ = test.groupby('frame').apply(len)
    print num_succ.mean()
1.0
1.0
1.00769230769
1.06071428571
1.05923344948
1.3018018018

Attempts vs Successes using min bin as cutoff

Use the minimum bin (in theta) for a success to set the cutoff of the histogram for attempts

In [42]:
#lower_theta_cutoff = [262, 256, 250, 250, 250, 230]
upper_theta_cutoff = [273.42,
                     273.50,
                     274.14,
                     273.68,
                     274.24,
                     273.94]
plt.figure(figsize=(8,8))
results_df = pd.DataFrame()
results_df['L'] = store.index['L']
for num, v in enumerate(kinetic_data):
    #theta_cutoff = lower_theta_cutoff[num]
    
    # Set the theta cutoff to the minimum theta of a particle that will transition
    theta_query = v.query('230 < theta < 300')
    theta_cutoff = theta_query[theta_query.last_frame_plate == True].theta.min()
    test = v.query('@theta_cutoff < theta < 300')
    test = test.drop_duplicates(subset=['frame', 'track id'], keep='first')
    
    # Histograms of each
    hist_pos, bins_pos = np.histogram(test['theta'].values, bins=30, range=[theta_cutoff,275])
    hist_succ, bins_succ = np.histogram(test[test['last_frame_plate']==True]['theta'].values, bins=30, range=[theta_cutoff,275])
    
    # Plotting the normed histograms of each
    plt.subplot(3,2,num+1)
    plt.hist(test['theta'].values, bins=30, range=[theta_cutoff,275], normed=True, alpha=0.5)
    plt.hist(test[test['last_frame_plate']==True]['theta'].values, bins=30, range=[theta_cutoff,275], normed=True, alpha=0.5)
    
    plt.ylabel('Normed Probability Density')
    plt.xlabel('Position Theta (degrees)')
    
    
    tot_pos = sum(hist_pos)
    tot_succ = sum(hist_succ)
    results_df.loc[num, 'num_attempts'] = tot_pos
    results_df.loc[num, 'num_successes'] = tot_succ
    results_df.loc[num, 'probability'] = tot_succ/float(tot_pos)
    
plt.legend(['Positions', 'Positions Successes'], loc='upper left')
plt.tight_layout()
plt.show()
save_dir = "C:\Users\Scherer Lab E\Documents\IPython Notebooks\Half Nanoplate Analysis\Data"
results_df.to_pickle(save_dir+"\\attempts_succ_min_theta_cutoff.pandas")
results_df
Out[42]:
L num_attempts num_successes probability
0 1 180.0 28.0 0.155556
1 2 1304.0 88.0 0.067485
2 3 669.0 120.0 0.179372
3 4 1412.0 287.0 0.203258
4 5 1072.0 297.0 0.277052
5 10 2427.0 1144.0 0.471364

Attempts vs Successes using a defined cutoff (230 degrees)

Set the cutoff for the attempts histogram for all experiments to be the same at 230 degrees

In [29]:
#lower_theta_cutoff = [262, 256, 250, 250, 250, 230]
plt.figure(figsize=(8,8))
results_df = pd.DataFrame()
results_df['L'] = store.index['L']
for num, v in enumerate(kinetic_data):
    theta_cutoff = 230
    
    # Set the theta cutoff to 230 degrees
    theta_query = v.query('230 < theta < 275')
    test = v.query('@theta_cutoff < theta < 275')
    test = test.drop_duplicates(subset=['frame', 'track id'], keep='first')
    
    # Histograms of each
    hist_pos, bins_pos = np.histogram(test['theta'].values, bins=30, range=[theta_cutoff,275])
    hist_succ, bins_succ = np.histogram(test[test['last_frame_plate']==True]['theta'].values, bins=30, range=[theta_cutoff,275])
    
    # Plotting the normed histograms of each
    plt.subplot(3,2,num+1)
    plt.hist(test['theta'].values, bins=30, range=[theta_cutoff,275], normed=True, alpha=0.5)
    plt.hist(test[test['last_frame_plate']==True]['theta'].values, bins=30, range=[theta_cutoff,275], normed=True, alpha=0.5)
    plt.ylabel('Normed Probability Density')
    plt.xlabel('Position Theta (degrees)')
    
    tot_pos = sum(hist_pos)
    tot_succ = sum(hist_succ)
    results_df.loc[num, 'num_attempts'] = tot_pos
    results_df.loc[num, 'num_successes'] = tot_succ
    results_df.loc[num, 'probability'] = tot_succ/float(tot_pos)
    
plt.legend(['Positions', 'Positions Successes'], loc='upper left')
plt.tight_layout()
plt.show()
save_dir = "C:\Users\Scherer Lab E\Documents\IPython Notebooks\Half Nanoplate Analysis\Data"
results_df.to_pickle(save_dir+"\\attempts_succ_set_230_cutoff.pandas")
results_df
Out[29]:
L num_attempts num_successes probability
0 1 2024.0 29.0 0.014328
1 2 1725.0 89.0 0.051594
2 3 1075.0 121.0 0.112558
3 4 1483.0 288.0 0.194201
4 5 1220.0 298.0 0.244262
5 10 2435.0 1145.0 0.470226

Attempts vs Successes Set Cutoff Manually at Peaks

Choose values of theta where only the entire peak of the attempts distribution is present. These values are set by hand

In [30]:
lower_theta_cutoff = [251, 259, 262, 263, 261, 260]
plt.figure(figsize=(8,8))
results_df = pd.DataFrame()
results_df['L'] = store.index['L']
for num, v in enumerate(kinetic_data):
    theta_cutoff = lower_theta_cutoff[num]
    
    # Set the theta cutoff to the hand picked value
    theta_query = v.query('230 < theta < 275')
    test = v.query('@theta_cutoff < theta < 275')
    test = test.drop_duplicates(subset=['frame', 'track id'], keep='first')
    
    # Histograms of each
    hist_pos, bins_pos = np.histogram(test['theta'].values, bins=30, range=[theta_cutoff,275])
    hist_succ, bins_succ = np.histogram(test[test['last_frame_plate']==True]['theta'].values, bins=30, range=[theta_cutoff,275])
    
    # Plotting the normed histograms of each
    plt.subplot(3,2,num+1)
    plt.hist(test['theta'].values, bins=30, range=[theta_cutoff,275], normed=True, alpha=0.5)
    plt.hist(test[test['last_frame_plate']==True]['theta'].values, bins=30, range=[theta_cutoff,275], normed=True, alpha=0.5)
    plt.ylabel('Normed Probability Density')
    plt.xlabel('Position Theta (degrees)')
    
    tot_pos = sum(hist_pos)
    tot_succ = sum(hist_succ)
    results_df.loc[num, 'num_attempts'] = tot_pos
    results_df.loc[num, 'num_successes'] = tot_succ
    results_df.loc[num, 'probability'] = tot_succ/float(tot_pos)
    
plt.legend(['Positions', 'Positions Successes'], loc='upper left')
plt.tight_layout()
plt.show()
save_dir = "C:\Users\Scherer Lab E\Documents\IPython Notebooks\Half Nanoplate Analysis\Data"
results_df.to_pickle(save_dir+"\\attempts_succ_hand_peak_cutoff.pandas")
results_df
Out[30]:
L num_attempts num_successes probability
0 1 1667.0 29.0 0.017397
1 2 1268.0 88.0 0.069401
2 3 637.0 117.0 0.183673
3 4 689.0 261.0 0.378810
4 5 558.0 267.0 0.478495
5 10 834.0 764.0 0.916067

Add the Status of the Nearest Neighbors to the DataFrame

To make analysis easier for determining if it is the particles on the plate or on the glass that affect the success rate of transition, these functions add two columns that tell if the nearest neighbor is over the plate. One column represents the nearest neighbor determined by cartesian coordinates while the other is respect to the nearest neighbor w.r.t. the reaction coordinate (theta only). These columns let us easily categorize the nearest neighbors using boolean comparisons.

In [16]:
def add_nn_id_over_plate_status(data_frame):
    '''Adds a column that tells if the nearest neighbor for each entry is over
    the plate or not. The nearest neighbor ids used here are the ones for 
    cartesian coordinates.
    
    :pram data_frame: A data frame of trajectories featuring multi-index for 
    nearest neighbors and also includes a column that tells if the particle is
    over the nanoplate or not.'''
    temp_df = data_frame.copy()
    temp_df_subset = temp_df[['frame', 'particle id', 'track id', 'over_plate','nn_num' , 'nn_id']]
    temp_df_no_dupes = temp_df_subset.drop_duplicates(subset=['frame', 'track id'], keep='first')
    # Merge the data frames on specific values in order to get the status of the NN to line up
    merged = temp_df_subset.merge(temp_df_no_dupes, left_on=['frame','nn_id'], right_on=['frame','track id'])
    # Sort the values like they were before the merge
    merged = merged.sort_values(['frame', 'particle id_x', 'nn_num_x']).reset_index(drop=True)
    temp_df['nn_over_plate'] = merged['over_plate_y']
    return temp_df

def add_theta_nn_id_over_plate_status(data_frame):
    '''Adds a column that tells if the nearest neighbor for each entry is over
    the plate or not. The nearest neighbor ids used here are the ones for polar
    coordinates in theta only (no r).
    
    :pram data_frame: A data frame of trajectories featuring multi-index for 
    nearest neighbors and also includes a column that tells if the particle is
    over the nanoplate or not.'''
    temp_df = data_frame.copy()
    temp_df_subset = temp_df[['frame', 'particle id', 'track id', 'over_plate','theta_nn_num' , 'theta_nn_id']]
    temp_df_no_dupes = temp_df_subset.drop_duplicates(subset=['frame', 'track id'], keep='first')
    # Merge the data frames on specific values in order to get the status of the NN to line up
    merged = temp_df_subset.merge(temp_df_no_dupes, left_on=['frame','theta_nn_id'], right_on=['frame','track id'])
    # Sort the values like they were before the merge
    merged = merged.sort_values(['frame', 'particle id_x', 'theta_nn_num_x']).reset_index(drop=True)
    temp_df['theta_nn_over_plate'] = merged['over_plate_y']
    return temp_df
    
for idx,p in enumerate(kinetic_data):
    p = add_nn_id_over_plate_status(p)
    kinetic_data[idx] = add_theta_nn_id_over_plate_status(p)

Effects of Nearest Neighbor on Successful Jumps off the Plate

First Nearest Neighbor Distance when Transition is Made

This is simply the distribution of the first nearest neighbor distance of a particle near the plate edge the frame before it leaves the plate.

In [17]:
import optical_binding_of_driven_particles

Ls = [1, 2, 3, 4, 5, 10]
um_conv=6.5/60/1.6/2
plt.figure(figsize=(8,7))
for num, df in enumerate(kinetic_data):
    df_copy = df.copy()
    leaving_plate = df_copy[df_copy.last_frame_plate == True]
    valid_nn_dist = leaving_plate[leaving_plate.nn_num == 1]
    plt.subplot(3,2,num+1)
    plt.hist(valid_nn_dist.nn_dist.values*um_conv, bins=25, range=[0,2.5])
    plt.title('NN Dist w/ Success L='+str(Ls[num]))
    plt.xlabel('1st NN Distance')
    plt.ylabel('Counts')
plt.tight_layout()
plt.show()

First Nearest Neighbor Over Glass vs Over Plate

Compare histograms of successful jumps off the nanoplate based on where the first neares neighbor is (either on the plate or on the glass). Can the particle get pulled down by a particle already on the glass or does it need to be pushed off from a particle also on the plate?

In [18]:
Ls = [1, 2, 3, 4, 5, 10]
um_conv=6.5/60/1.6/2
plt.figure(figsize=(8,7))
for num, df in enumerate(kinetic_data):
    df_copy = df.copy()
    leaving_plate = df_copy[df_copy.last_frame_plate == True]
    valid_nn_dist = leaving_plate #optical_binding_of_driven_particles.filter_by_n_nn_with_more_d_distance_away(leaving_plate, 1, 1.5)
    valid_nn_dist = valid_nn_dist[valid_nn_dist.nn_num == 1]
    plt.subplot(3,2,num+1)
    valid_nn_dist_plate = valid_nn_dist[valid_nn_dist.nn_over_plate == True]
    valid_nn_dist_glass = valid_nn_dist[valid_nn_dist.nn_over_plate == False]
    plt.hist(valid_nn_dist_plate.nn_dist.values*um_conv, bins=60, range=[0,4], alpha=0.5)
    plt.hist(valid_nn_dist_glass.nn_dist.values*um_conv, bins=60, range=[0,4], alpha=0.5)
    plt.title('NN Dist w/ Success L='+str(Ls[num]))
    plt.xlabel('1st NN Distance')
    plt.ylabel('Counts')
    if num == 5:
        plt.legend(['NN Over Plate', 'NN Over Glass'])
plt.tight_layout()
plt.show()
In [19]:
store.close()